home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
9-Digit Zip Code Directory
/
9-Digit Zip Code Directory (American Business Information) (ABIZIP-12).ISO
/
z4src.zip
/
CPARITH.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-08-13
|
18KB
|
662 lines
//----------------------------------------------------------------------------
// MODULE DESCRIPTION
//
// Module: cparith.c
// Title: Compress Library
// Notice: John M. Weeder
// Copyright (c) 1993. All rights reserved.
// This module contains proprietary information and should be
// treated as confidential.
//
//----------------------------------------------------------------------------
// MAINTENANCE HISTORY
//
// $Workfile$
// $Revision$
// $Author$
// $Date$
// $Log$
//
//----------------------------------------------------------------------------
// MODULE NARRATIVE
//
//
// This module contains arithmetic encoding and decoding routines.
// Due to allowances for multi-character sequences, encoding is slow. However,
// decoding is not affected.
//
// Encoding and decoding are placed in the same module to aid in optimization.
// Since the total amount of code is small, there is not much overhead.
//
// The decoding routine must read past the end of the buffer in order to
// decode a string. The decode buffer should always be 2 bytes larger than
// the data to be decoded!
//
//
// THESE ROUTINES ARE NOT RE-ENTRANT.
//
// The code in this module should be written entirely in C.
// Do not use any C++ constructs.
//
// This module is portable to:
// DOS 3.X+
// MS Windows 3.X+
// OS/2 2.X+
// OS/2 2.0 PM
// SCO UNIX.
//
// The following compilers are supported:
// MSC 6.0A
// MSC/C++ 7.0
// Borland C++ 3.1 for DOS
// Borland C++ 1.0 for OS/2 2.X
// SCO UNIX cc
//
//----------------------------------------------------------------------------
#include <cp.h>
//----------------------------------------------------------------------------
// Constants
//----------------------------------------------------------------------------
//
// Maximum number of symbols which may be encoded
// An extra symbol is always included in this -- representing the terminating
// symbol. Therefore, a maximum of MAX_SYMBOLS-1 are available to the end
// user.
//
#define MAX_SYMBOLS (256)
//
// Maximum allowed frequency count 2^14 - 1
//
#define MAX_FREQUENCY (16383)
//
// Code value definition
//
#define CODE_VALUE_BITS (16) // Bit in a code value
typedef long CODE_VALUE; // Code value representation
#define TOP_VALUE ((CODE_VALUE)(((CODE_VALUE)1<<CODE_VALUE_BITS)-1))
#define FIRST_QTR ((CODE_VALUE)(TOP_VALUE/4+1))
#define HALF ((CODE_VALUE)(2*FIRST_QTR))
#define THIRD_QTR ((CODE_VALUE)(3*FIRST_QTR))
//----------------------------------------------------------------------------
// Variables
//----------------------------------------------------------------------------
static PCP_ARITH parith; // Current model's statistics
static SIZET acCumFreq[MAX_SYMBOLS+1]; // Cumulative frequencies
static SIZET cSymbols; // Number of symbols
#define cEof (cSymbols) // EOF symbol
static BYTE bBuffer; // Bits buffered for output
static SIZET cBits2Go; // Number of bits free in buffer
static PBYTE pbBuffer; // Input/output buffer pointer
static SIZET cBytes; // Bytes input or output
static SIZET cMaxBytes; // Maximum input or output bytes
static SIZET cGarbageBytes; // Number of bytes past end_of_file
static CODE_VALUE value; // Currently-seen code value */
static CODE_VALUE low; // Ends of current code region */
static CODE_VALUE high; // Ends of the current code region */
static SIZET cBits2Follow; // Number of opposite bits to output after
// the next bit.
//----------------------------------------------------------------------------
// Prototypes
//----------------------------------------------------------------------------
static int __arithsort__(const void *, const void *);
static VOID FN_L ArithAdjustSymbol(PCP_ARITH _parith, SIZET i);
static int FN_L ArithBitInput(void);
static VOID FN_L ArithBitOutput(int);
static VOID FN_L ArithBitOutputFollow(int);
static SIZET FN_L ArithDecodeSymbol(void);
static VOID FN_L ArithEncodeSymbol(SIZET);
//----------------------------------------------------------------------------
// Description: Sort routine used by qsort to sort model statistics.
// Sorts in decreasing order of frequency
// Parameters: see qsort()
// Returns: see qsort()
//----------------------------------------------------------------------------
static int __arithsort__(const void *pv1, const void *pv2)
{
PCP_ARITH parith1 = (PCP_ARITH)pv1;
PCP_ARITH parith2 = (PCP_ARITH)pv2;
if (parith1->cFreq > parith2->cFreq)
return -1;
else if (parith1->cFreq < parith2->cFreq)
return 1;
return 0;
}
//----------------------------------------------------------------------------
// Description: Adjust statistics of model based on input string.
// Note that the frequency of the last element (the EOF
// element) is incremented for each string
// Parameters: _parith Model to adjust
// psz String to adjust model with.
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
VOID FN_E ArithAdjust(PCP_ARITH _parith, PSZ psz)
{
SIZET i;
for (i = 0; _parith[i].szSymbol[0]; ++i)
_parith[i].cLen = strlen(_parith[i].szSymbol);
for (; psz[0]; psz++) // Adjust symbol count for each symbol
{ // in the string
SIZET cLen = 0;
SIZET cSymbol;
for (i = 0; _parith[i].szSymbol[0]; ++i)
{
if (strncmp(psz, _parith[i].szSymbol, _parith[i].cLen) == 0
&& _parith[i].cLen)
{
cSymbol = i;
cLen = parith[i].cLen;
}
}
NOTUSED(cLen);
Assert(cLen);
ArithAdjustSymbol(_parith, cSymbol);
}
// Find last symbol
for (i = 0; _parith[i].szSymbol[0]; ++i)
;
ArithAdjustSymbol(_parith, i); // Increment by one
return ;
}
//----------------------------------------------------------------------------
// Description: Adjust statistics of a model symbol
// The frequency is adjusted by the size of the string.
// Frequencies of zero are allowed!!
// Parameters: _parith Model to adjust
// i Element to increment
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static VOID FN_L ArithAdjustSymbol(PCP_ARITH _parith, SIZET i)
{
SIZET j;
_parith[i].cFreq += MAX(_parith[i].cLen, 1);
if (_parith[i].cFreq >= MAX_FREQUENCY)
{
for (j = 0; _parith[j].szSymbol[0]; ++j)
{
if (_parith[i].cFreq)
{
_parith[i].cFreq >>= 1;
if (!_parith[i].cFreq)
_parith[i].cFreq = 1;
}
}
}
return ;
}
//----------------------------------------------------------------------------
// Description: Input a bit
// Parameters:
// Returns: Bit
//----------------------------------------------------------------------------
static int FN_L ArithBitInput(void)
{
int iBit;
if (cBits2Go == 0)
{
if (cBytes >= cMaxBytes)
{
bBuffer = 0x01; // Garbage bits!!
cGarbageBytes++; // No more than 2 extra bytes allowed
Assert(cGarbageBytes <= 2);
}
else
bBuffer = *pbBuffer++;
cBytes++;
cBits2Go = 8;
}
iBit = bBuffer & 1;
bBuffer >>= 1;
cBits2Go--;
return iBit;
}
//----------------------------------------------------------------------------
// Description: Output a bit
// Parameters: iBit Bit to output
// Returns:
//----------------------------------------------------------------------------
static VOID FN_L ArithBitOutput(int iBit)
{
bBuffer >>= 1;
if (iBit)
bBuffer |= 0x80;
cBits2Go--;
if (cBits2Go == 0)
{
*pbBuffer++ = bBuffer;
cBytes++;
cBits2Go = 8;
}
return ;
}
//----------------------------------------------------------------------------
// Description: Output final bits
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
static VOID FN_L ArithBitOutputFollow(int iBit)
{
ArithBitOutput(iBit);
for (; cBits2Follow; --cBits2Follow)
ArithBitOutput(!iBit);
return ;
}
//----------------------------------------------------------------------------
// Description: Decode a buffer
// Parameters: pbDecode Pointer to buffer to decode.
// pcDecode Pointer to size of buffer. Size is adjusted
// by the number of bytes decoded.
// pch Buffer to place decoded text into.
// Null terminator is added to buffer.
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
BOOL FN_E ArithDecode(PBYTE pbDecode, PSIZET pcDecode, PCHAR pch)
{
SIZET i;
Assert(pbDecode && pcDecode && pch);
Assert(*pcDecode >= 2);
pbBuffer = pbDecode;
cBits2Go = 0;
cGarbageBytes = 0;
cBytes = 0;
cMaxBytes = *pcDecode;
value = 0;
for (i = 1; i <= CODE_VALUE_BITS; i++)
{
value <<= 1;
value += (CODE_VALUE)ArithBitInput();
}
low = 0;
high = TOP_VALUE;
for (;;)
{
SIZET cLen; // Decode a symbol
SIZET cSymbol = ArithDecodeSymbol();
if (cSymbol == cEof)
{
pch[0] = '\0'; // Add null terminator
cBytes -= 2; // Always 2 garbage bytes
if (cBytes <= *pcDecode) // Adjust number of remaining bytes
*pcDecode -= cBytes;
else
*pcDecode = 0;
break;
} // Verify decoded string fits
cLen = strlen(parith[cSymbol-1].szSymbol);
; // Copy string to buffer
strcpy(pch, parith[cSymbol-1].szSymbol);
pch += cLen;
}
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Decode a symbol
// Parameters:
// Returns: Symbol (1..cSymbols)
//----------------------------------------------------------------------------
static SIZET FN_L ArithDecodeSymbol(void)
{
CODE_VALUE range; // Size of current code region
SIZET cCum; // Cumulative frequency calculated
SIZET cSymbol; // Symbol decoded
range = (CODE_VALUE)(high - low) + 1;
cCum = (SIZET)((((CODE_VALUE)(value - low) + 1) * acCumFreq[0] - 1)/range);
for (cSymbol = 1; acCumFreq[cSymbol] > cCum; cSymbol++)
;
high = low + (range * acCumFreq[cSymbol - 1]) / acCumFreq[0] - 1;
low += (range * acCumFreq[cSymbol]) / acCumFreq[0];
for (;;)
{
if (high < HALF)
{
/* nothing */ ;
}
else if (low >= HALF)
{
value -= HALF;
low -= HALF;
high -= HALF;
}
else if (low >= FIRST_QTR && high < THIRD_QTR)
{
value -= FIRST_QTR;
low -= FIRST_QTR;
high -= FIRST_QTR;
}
else
break;
low <<= 1;
high <<= 1;
high++;
value <<= 1;
value += (CODE_VALUE)ArithBitInput();
}
return cSymbol;
}
//----------------------------------------------------------------------------
// Description: Encode a buffer
// Parameters: pbEncode Pointer to buffer to encode into.
// pcEncode Pointer to size of buffer. Size is adjusted
// by the number of bytes encoded.
// psz Buffer to read text from.
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
BOOL FN_E ArithEncode(PBYTE pbEncode, PSIZET pcEncode, PSZ psz)
{
Assert(pbEncode && pcEncode && psz);
Assert(pcEncode[0]);
Assert(psz[0]);
pbBuffer = pbEncode;
bBuffer = 0; // Initialize
cBits2Go = 8;
cBytes = 0;
cBits2Follow = 0; // No bits to follow next
low = 0; // Full code range
high = TOP_VALUE;
while (psz[0])
{
SIZET i;
SIZET cSymbol;
SIZET cLen = 0;
// Find longest symbol
for (i = 0; parith[i].szSymbol[0]; ++i)
{
if (strncmp(psz, parith[i].szSymbol, parith[i].cLen) == 0
&& parith[i].cLen > cLen)
{
cSymbol = i;
cLen = parith[i].cLen;
}
}
Assert(cLen); // Verify valid symbol
Assert(parith[cSymbol].cFreq); // Verify allowed symbol!
ArithEncodeSymbol(cSymbol+1); // Encode symbol
if (cBytes >= *pcEncode) // Check buffer overflow
return FALSE;
// Move to next character
psz += cLen;
}
ArithEncodeSymbol(cEof); // Encode terminator
cBits2Follow++; // And follow bits
if (low < FIRST_QTR)
ArithBitOutputFollow(0);
else
ArithBitOutputFollow(1);
if (cBits2Go != 8)
{
if (cBytes >= *pcEncode)
return FALSE;
*pbBuffer++ = (bBuffer >> cBits2Go);
cBytes++;
}
*pcEncode = cBytes;
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Encode a symbol
// Parameters: cSymbol Symbol to encode
// Returns:
//----------------------------------------------------------------------------
static VOID FN_L ArithEncodeSymbol(SIZET cSymbol)
{
CODE_VALUE range;
range = (CODE_VALUE)(high - low) + 1;
high = low + (range * acCumFreq[cSymbol - 1]) / acCumFreq[0] - 1;
low = low + (range * acCumFreq[cSymbol]) / acCumFreq[0];
for (;;)
{
if (high < HALF)
{
ArithBitOutputFollow(0);
}
else if (low >= HALF)
{
ArithBitOutputFollow(1);
low -= HALF;
high -= HALF;
}
else if (low >= FIRST_QTR && high < THIRD_QTR)
{
cBits2Follow++;
low -= FIRST_QTR;
high -= FIRST_QTR;
}
else
break;
low <<= 1;
high <<= 1;
high++;
}
return ;
}
//----------------------------------------------------------------------------
// Description:
// Parameters:
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
BOOL FN_E ArithInitialize(PCP_ARITH _parith)
{
SIZET i;
BOOL fDone;
parith = _parith; // Store model
if (!parith[cEof-1].cFreq) // End of file must have freq
parith[cEof-1].cFreq = 1;
//
// Count symbols
//
cSymbols = 0;
do
{
parith[cSymbols].cLen = strlen(parith[cSymbols].szSymbol);
Assert(parith[cSymbols].cLen < 4);
cSymbols++;
}
while (parith[cSymbols-1].szSymbol[0]);
Assert(cSymbols <= MAX_SYMBOLS); // Sort (except EOF symbol)
qsort(parith, cSymbols - 1, sizeof(CP_ARITH), __arithsort__);
do // Verify frequencies of model
{
SIZET cFreq = 0;
fDone = TRUE;
for (i = 0; i < cEof; i++)
if (cFreq + parith[i].cFreq > (SIZET)MAX_FREQUENCY)
{
fDone = FALSE;
break;
}
else
cFreq += parith[i].cFreq;
if (!fDone)
{
for (i = 0; i < cEof; i++)
{
if (parith[i].cFreq)
{
parith[i].cFreq >>= 1;
if (!parith[i].cFreq)
parith[i].cFreq = 1;
}
}
}
}
while (!fDone);
acCumFreq[cEof] = 0; // Create cumulative freqs
for (i = cEof; i > 0; i--)
acCumFreq[i-1] = acCumFreq[i] + parith[i-1].cFreq;
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Run standard test suite
// Parameters: sTest Test to run.
// 0 Run all default tests (except).
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
#if COMPILE_TEST
BOOL FN ArithTest(SHORT sTest)
{
static CP_ARITH aarith[255] =
{
{ 0, "d" },
{ 0, "e" },
{ 0, "f" },
{ 0, "g" },
{ 0, "h" },
{ 0, "i" },
{ 0, "j" },
{ 0, "k" },
{ 0, "l" },
{ 0, "m" },
{ 0, "n" },
{ 0, "o" },
{ 0, "p" },
{ 0, "q" },
{ 0, "a" },
{ 0, "aa" },
{ 0, "aaa" },
{ 0, "b" },
{ 0, "bb" },
{ 0, "bbb" },
{ 0, "c" },
{ 0, "cc" },
{ 0, "ccc" },
{ 0, "" }
};
static PSZ pszTest1 = "abcaabbccaaabbbccc";
static PSZ pszTest2 =
"aaaaaaaaaaa"
"bbbbbbbbbbb"
"ccccccccccc"
"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"
"abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
"abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
"abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
"aabbccaabbccaabbccaabbccaabbccaabbccaabbccaabbccaabbccaabbccaabbccaabbcc";
static CHAR szResult[1024];
static BYTE abEncode[1024];
static SIZET cEncode;
SIZET cInput, cOutput;
LONG lPercent;
if (!sTest || sTest == 1)
{
ArithAdjust(aarith, pszTest1);
ArithAdjust(aarith, pszTest2);
if (!ArithInitialize(aarith))
return FALSE;
cEncode = sizeof(abEncode);
if (!ArithEncode(abEncode, &cEncode, pszTest1))
return FALSE;
cOutput = cEncode;
cInput = strlen(pszTest1)+1;
if (!ArithDecode(abEncode, &cEncode, szResult))
return FALSE;
if (strcmp(szResult, pszTest1) != 0)
return FALSE;
lPercent = ((LONG)cOutput * 100L) / (LONG)cInput;
Output("Input: %-5d Output: %-5d %3ld %%\n", cInput, cOutput, 100 - lPercent);
cEncode = sizeof(abEncode);
if (!ArithEncode(abEncode, &cEncode, pszTest2))
return FALSE;
cOutput = cEncode;
cInput = strlen(pszTest2)+1;
if (!ArithDecode(abEncode, &cEncode, szResult))
return FALSE;
if (strcmp(szResult, pszTest2) != 0)
return FALSE;
lPercent = ((LONG)cOutput * 100L) / (LONG)cInput;
Output("Input: %-5d Output: %-5d %3ld %%\n", cInput, cOutput, 100 - lPercent);
}
else if (!sTest || sTest == 2)
{
// read into buffer encode decode and compare!!!
/* file test */ ;
}
else if (!sTest || sTest == 3)
{
/* load and save a model! */ ;
}
return TRUE;
}
#endif
//----------------------------------------------------------------------------
//------------------------------- End of File --------------------------------
//----------------------------------------------------------------------------